6  scTCRseq: Mapping clones from scRNA and scTCRseq to bulkTCRseq

6.1 Set up workspace

# Libraries
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.0.4     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(Seurat)
Loading required package: SeuratObject
Loading required package: sp

Attaching package: 'SeuratObject'

The following objects are masked from 'package:base':

    intersect, t
library(dplyr)
library(scRepertoire)

6.2 Load tumor data with clones loaded with a custom scRepertoire variable: vjaa (V gene, J gene, CDR3 aa)

sc_tils <- readRDS("scRNA_TCR_Tumor_Part3.Rds")

sc_skin <- readRDS("scRNA_TCR_Skin_Part3.Rds")

6.3 Create a dataframe of clones per patient, where each row is a chain from a clone.

Tumor

# Splitting TRA and TRB
sctils_clone_chains <- sc_tils@meta.data %>%
  group_by(Patient) %>%
  dplyr::count(vjaa, Patient) %>%
  arrange(desc(n)) %>%
  dplyr::rename("tumor_vjaa_counts" = "n") %>%
  mutate(bulk_chain = strsplit(as.character(vjaa), "_")) %>%
  unnest(bulk_chain) %>%
  filter(bulk_chain != "NA;NA")

# Get clones with just 1 TRA or TRB chain
sctils_chain_one <- sctils_clone_chains %>%
  filter(str_count(bulk_chain, ";") == 1)

# Unnest clones with more than 1 TRA or TRB chain
sctils_chain_multi <- sctils_clone_chains %>%
  filter(str_count(bulk_chain, ";") > 1) %>%
  # split vjaa into 4 columns
  mutate(vj1 = str_split_i(bulk_chain, ";", 1),
         vj2 = str_split_i(bulk_chain, ";", 2),
         aa1 = str_split_i(bulk_chain, ";", 3),
         aa2 = str_split_i(bulk_chain, ";", 4),
         vjaa1 = paste0(vj1, ";", aa1),
         vjaa2 = paste0(vj2, ";", aa2)) %>%
  select(Patient, tumor_vjaa_counts, vjaa1, vjaa2, vjaa) %>%
  pivot_longer(cols = c("vjaa1", "vjaa2"), names_to = "chain", values_to = "bulk_chain") %>%
  select(-chain)

sctils_clone_chains <- rbind(sctils_chain_one, sctils_chain_multi)

# Check that all clones are unique across patients
sctils_clone_chains %>% distinct(vjaa, Patient, .keep_all = TRUE) %>% filter(duplicated(vjaa))
# A tibble: 0 × 4
# Groups:   Patient [0]
# ℹ 4 variables: Patient <chr>, vjaa <chr>, tumor_vjaa_counts <int>,
#   bulk_chain <chr>

Skin

# Splitting TRA and TRB from one clone into separate rows
scskin_clone_chains <- sc_skin@meta.data %>%
  group_by(Patient) %>%
  dplyr::count(vjaa, Patient) %>%
  arrange(desc(n)) %>%
  dplyr::rename("skin_vjaa_counts" = "n") %>%
  mutate(bulk_chain = strsplit(as.character(vjaa), "_")) %>%
  unnest(bulk_chain) %>%
  filter(bulk_chain != "NA;NA")

# Get clones with just 1 TRA or TRB chain
scskin_chain_one <- scskin_clone_chains %>%
  filter(str_count(bulk_chain, ";") == 1)

# Unnest clones with more than 1 TRA or TRB chain
scskin_chain_multi <- scskin_clone_chains %>%
  filter(str_count(bulk_chain, ";") > 1) %>%
  # split vjaa into 4 columns
  mutate(vj1 = str_split_i(bulk_chain, ";", 1),
         vj2 = str_split_i(bulk_chain, ";", 2),
         aa1 = str_split_i(bulk_chain, ";", 3),
         aa2 = str_split_i(bulk_chain, ";", 4),
         vjaa1 = paste0(vj1, ";", aa1),
         vjaa2 = paste0(vj2, ";", aa2)) %>%
  select(Patient, skin_vjaa_counts, vjaa1, vjaa2, vjaa) %>%
  pivot_longer(cols = c("vjaa1", "vjaa2"), names_to = "chain", values_to = "bulk_chain") %>%
  select(-chain)

scskin_clone_chains <- rbind(scskin_chain_one, scskin_chain_multi)

# All clones are unique across patients
scskin_clone_chains %>% distinct(vjaa, Patient, .keep_all = TRUE) %>% filter(duplicated(vjaa))
# A tibble: 0 × 4
# Groups:   Patient [0]
# ℹ 4 variables: Patient <chr>, vjaa <chr>, skin_vjaa_counts <int>,
#   bulk_chain <chr>

6.4 Merge clones into one dataframe and count clone frequency over both tumor and skin

sc_clone_chains <- sctils_clone_chains %>%
  full_join(scskin_clone_chains, by = c("Patient", "vjaa", "bulk_chain")) %>%
  replace(is.na(.), 0) %>%
  mutate(total_vjaa_counts = tumor_vjaa_counts + skin_vjaa_counts) %>%
  arrange(desc(total_vjaa_counts))

6.5 Create clone IDs based on decreasing clone frequency in both skin and tumor

sc_clone_ids <- sc_clone_chains %>%
  group_by(Patient) %>%
  distinct(Patient, vjaa) %>%
  mutate(obs = 1:n(),
         sc_clone_id = paste0(Patient, "_", obs)) %>%
  select(-obs) 

6.6 Add clone IDs to full dataframe

sc_clone_chains <- left_join(sc_clone_chains, sc_clone_ids, by = c("Patient", "vjaa")) %>%
  select(Patient, sc_clone_id, vjaa, bulk_chain, tumor_vjaa_counts, skin_vjaa_counts, total_vjaa_counts)

6.7 Reformat chains to match bulk format

sc_clone_chains <- sc_clone_chains %>% 
  # Reformat certain V genes to match bulk (bulk can't differentiate btwn certain V genes)
  mutate(bulk_chain = str_replace_all(bulk_chain, "TRBV12-3|TRBV12-4", "TRBV12-3/4"),
         bulk_chain = str_replace_all(bulk_chain, "TRBV6-2|TRBV6-3|TRBV6-5|TRBV6-6", "TRBV6-2/3/5/6"),
         bulk_chain = str_replace_all(bulk_chain, "TRAV29/DV5", "TRAV29DV5"),
         bulk_chain = str_replace_all(bulk_chain, "TRAV14/DV4", "TRAV14DV4"),
         bulk_chain = str_replace_all(bulk_chain, "TRAV23/DV6", "TRAV23DV6"),
         bulk_chain = str_replace_all(bulk_chain, "TRAV36/DV7", "TRAV36DV7"),
         bulk_chain = str_replace_all(bulk_chain, "TRAV38-2/DV8", "TRAV38-2DV8")) %>%
  # Reformat the . separator to ; as per bulk
  mutate(bulk_chain = str_replace_all(bulk_chain, fixed("."), ";"),)

6.8 Add scclone ID and tumor, skin and total counts to scRNA objects

# Make sure there's barcode names
sc_skin$barcodes <- rownames(sc_skin@meta.data)
sc_tils$barcodes <- rownames(sc_tils@meta.data)

sc_clones_unique <- sc_clone_chains %>%
  select(Patient, vjaa, skin_vjaa_counts, tumor_vjaa_counts, sc_clone_id) %>%
  distinct(.keep_all = TRUE)

sc_skin@meta.data <- sc_skin@meta.data %>%
  left_join(sc_clones_unique, by = c("Patient", "vjaa"))
rownames(sc_skin@meta.data) <- sc_skin$barcodes

sc_tils@meta.data <- sc_tils@meta.data %>%
  left_join(sc_clones_unique, by = c("Patient", "vjaa"))
rownames(sc_tils@meta.data) <- sc_tils$barcodes

6.9 Save scRNA objects

saveRDS(sc_tils, "scRNA_TCR_Tumor_Part4.Rds")

saveRDS(sc_skin, "scRNA_TCR_Skin_Part4.Rds")

6.10 Save clones

write.csv(sc_clone_chains, "scRNA_TCR_bulkTCR_chain_mapping_Part4.csv", row.names = FALSE)

6.11 Get session info

sessionInfo()
R version 4.3.2 (2023-10-31)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Rocky Linux 8.10 (Green Obsidian)

Matrix products: default
BLAS/LAPACK: /usr/lib64/libopenblasp-r0.3.15.so;  LAPACK version 3.9.0

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

time zone: America/New_York
tzcode source: system (glibc)

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] scRepertoire_2.0.0 Seurat_5.1.0       SeuratObject_5.0.2 sp_2.2-0          
 [5] lubridate_1.9.4    forcats_1.0.0      stringr_1.5.1      dplyr_1.1.4       
 [9] purrr_1.0.4        readr_2.1.5        tidyr_1.3.1        tibble_3.2.1      
[13] ggplot2_3.5.1      tidyverse_2.0.0   

loaded via a namespace (and not attached):
  [1] cubature_2.1.1              RcppAnnoy_0.0.22           
  [3] splines_4.3.2               later_1.4.1                
  [5] bitops_1.0-9                R.oo_1.27.0                
  [7] polyclip_1.10-7             fastDummies_1.7.5          
  [9] lifecycle_1.0.4             globals_0.16.3             
 [11] lattice_0.22-7              MASS_7.3-60.0.1            
 [13] magrittr_2.0.3              plotly_4.10.4              
 [15] rmarkdown_2.29              remotes_2.5.0              
 [17] httpuv_1.6.15               sctransform_0.4.1          
 [19] flexmix_2.3-20              spam_2.11-1                
 [21] spatstat.sparse_3.1-0       reticulate_1.42.0          
 [23] cowplot_1.1.3               pbapply_1.7-2              
 [25] RColorBrewer_1.1-3          abind_1.4-8                
 [27] zlibbioc_1.48.2             Rtsne_0.17                 
 [29] GenomicRanges_1.54.1        R.utils_2.13.0             
 [31] ggraph_2.2.1                BiocGenerics_0.48.1        
 [33] RCurl_1.98-1.17             nnet_7.3-20                
 [35] tweenr_2.0.3                evmix_2.12                 
 [37] GenomeInfoDbData_1.2.11     IRanges_2.36.0             
 [39] S4Vectors_0.40.2            ggrepel_0.9.5              
 [41] irlba_2.3.5.1               listenv_0.9.1              
 [43] spatstat.utils_3.1-0        SeuratWrappers_0.3.5       
 [45] iNEXT_3.0.1                 MatrixModels_0.5-3         
 [47] goftest_1.2-3               RSpectra_0.16-2            
 [49] spatstat.random_3.3-1       fitdistrplus_1.2-2         
 [51] parallelly_1.41.0           leiden_0.4.3.1             
 [53] codetools_0.2-20            DelayedArray_0.28.0        
 [55] ggforce_0.4.2               tidyselect_1.2.1           
 [57] farver_2.1.2                viridis_0.6.5              
 [59] matrixStats_1.5.0           stats4_4.3.2               
 [61] spatstat.explore_3.3-2      jsonlite_1.8.9             
 [63] tidygraph_1.3.1             progressr_0.15.1           
 [65] ggridges_0.5.6              ggalluvial_0.12.5          
 [67] survival_3.8-3              tools_4.3.2                
 [69] stringdist_0.9.12           ica_1.0-3                  
 [71] Rcpp_1.0.14                 glue_1.8.0                 
 [73] gridExtra_2.3               SparseArray_1.2.4          
 [75] xfun_0.50                   MatrixGenerics_1.14.0      
 [77] GenomeInfoDb_1.38.8         withr_3.0.2                
 [79] BiocManager_1.30.25         fastmap_1.2.0              
 [81] SparseM_1.84-2              rsvd_1.0.5                 
 [83] digest_0.6.37               timechange_0.3.0           
 [85] R6_2.6.1                    mime_0.13                  
 [87] colorspace_2.1-1            scattermore_1.2            
 [89] tensor_1.5                  spatstat.data_3.1-2        
 [91] R.methodsS3_1.8.2           generics_0.1.3             
 [93] data.table_1.15.4           graphlayouts_1.1.1         
 [95] httr_1.4.7                  htmlwidgets_1.6.4          
 [97] S4Arrays_1.2.1              uwot_0.2.3                 
 [99] pkgconfig_2.0.3             gtable_0.3.6               
[101] modeltools_0.2-23           lmtest_0.9-40              
[103] SingleCellExperiment_1.24.0 XVector_0.42.0             
[105] htmltools_0.5.8.1           dotCall64_1.2              
[107] scales_1.3.0                Biobase_2.62.0             
[109] png_0.1-8                   spatstat.univar_3.0-0      
[111] ggdendro_0.2.0              knitr_1.49                 
[113] rstudioapi_0.17.1           rjson_0.2.23               
[115] tzdb_0.5.0                  reshape2_1.4.4             
[117] nlme_3.1-168                zoo_1.8-13                 
[119] cachem_1.1.0                KernSmooth_2.23-26         
[121] parallel_4.3.2              miniUI_0.1.1.1             
[123] pillar_1.10.1               grid_4.3.2                 
[125] vctrs_0.6.5                 RANN_2.6.2                 
[127] VGAM_1.1-13                 promises_1.3.2             
[129] xtable_1.8-4                cluster_2.1.8.1            
[131] evaluate_1.0.1              truncdist_1.0-2            
[133] cli_3.6.3                   compiler_4.3.2             
[135] rlang_1.1.5                 crayon_1.5.3               
[137] future.apply_1.11.3         plyr_1.8.9                 
[139] stringi_1.8.4               viridisLite_0.4.2          
[141] deldir_2.0-4                munsell_0.5.1              
[143] gsl_2.1-8                   lazyeval_0.2.2             
[145] spatstat.geom_3.3-2         quantreg_6.1               
[147] Matrix_1.6-5                RcppHNSW_0.6.0             
[149] hms_1.1.3                   patchwork_1.3.0            
[151] future_1.34.0               shiny_1.9.1                
[153] SummarizedExperiment_1.32.0 evd_2.3-7.1                
[155] ROCR_1.0-11                 igraph_2.0.3               
[157] memoise_2.0.1